In this script, we perform the 2-D t-SNE of the normalized (L-2 norm_effects() function) SFA output with 10 factors for shrunken posterior mean estimates of the most active GTEx cis-eQTLs.

gene_info_eqtl = data.frame(fread("../data/gene_info_eqtl.txt"));

length(gene_info_eqtl[,1]) ## number of IDs that found matches (no repeats)
## [1] 65774
index_unique_IDs=match(unique(gene_info_eqtl[,1]),gene_info_eqtl[,1]);

## all are unique IDs in this case (Check !!)
genesnp.names=as.vector(as.matrix(read.table("../data/gene_snp_names.txt")));
gene_names_GTEX=substring(genesnp.names,1,15);

matching_labels_eqtls_to_gene_info= match(gene_names_GTEX,gene_info_eqtl[,1]);
index_not_found=which(is.na(matching_labels_eqtls_to_gene_info)==TRUE);

Perform t-SNE on the 16407 most active cis-eQTLs and filter the data so that only the cis genes for which we have BioMart info are available.

tsne_data=read.table("../data/tsne_eQTL_sfa_unshrink.txt");
temp=tsne_data[-index_not_found,];

matching_labels=matching_labels_eqtls_to_gene_info[which(!is.na(matching_labels_eqtls_to_gene_info))];

flag=paste0(gene_info_eqtl[matching_labels,1],", ",
                      "name-",gene_info_eqtl$Associated.Gene.Name[matching_labels],", ",
                      "descr-",gene_info_eqtl$Description[matching_labels],",",
                      "chr-",gene_info_eqtl$Chromosome.Name[matching_labels]);


rownames(temp)=flag;
post.mean.shrink = read.table("../data/post.mean.et_withbma.normalized.txt");
#post.mean.shrink=data_max_tscore[complete.cases(data_max_tscore),-c(1,2,47)];
post.mean.shrink.matched=post.mean.shrink[-index_not_found,];

rownames(post.mean.shrink.matched)=rownames(temp);

n_index=2000;
tissue_specific_index=order(apply(post.mean.shrink.matched,1,max),decreasing=TRUE)[1:10000];

plot(temp[tissue_specific_index,1],temp[tissue_specific_index,2],xlab="t-SNE projection 1",ylab="t-SNE projection 2");

suppressMessages(suppressWarnings(iplot(temp[tissue_specific_index,1],temp[tissue_specific_index,2],
                                        rep(1,10000),rownames(temp[tissue_specific_index,]))))

suppressMessages(suppressWarnings(iplotCurves(post.mean.shrink.matched[1:2000,], 1:44, temp[1:2000,])));